First we get the metaviper predictions, LV scores, and Random Forest weights from Synapse. We filter for LVs that are selected by the random forest.
#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',mp_scores))$asDataFrame()%>%
subset(isCellLine!='TRUE')
##
Building the CSV... [######--------------]30.24% 45381/150072
Building the CSV... [######--------------]30.24% 45381/150072
Building the CSV... [####################]100.00% 150072/150072 Done...
Downloading [##------------------]7.96% 2.0MB/25.1MB (1.7MB/s) Job-103158846784545651976749201.csv
Downloading [###-----------------]15.92% 4.0MB/25.1MB (1.8MB/s) Job-103158846784545651976749201.csv
Downloading [#####---------------]23.88% 6.0MB/25.1MB (1.8MB/s) Job-103158846784545651976749201.csv
Downloading [######--------------]31.85% 8.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv
Downloading [########------------]39.81% 10.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv
Downloading [##########----------]47.77% 12.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv
Downloading [###########---------]55.73% 14.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv
Downloading [#############-------]63.69% 16.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv
Downloading [##############------]71.65% 18.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv
Downloading [################----]79.61% 20.0MB/25.1MB (2.0MB/s) Job-103158846784545651976749201.csv
Downloading [##################--]87.58% 22.0MB/25.1MB (2.0MB/s) Job-103158846784545651976749201.csv
Downloading [###################-]95.54% 24.0MB/25.1MB (2.1MB/s) Job-103158846784545651976749201.csv
Downloading [####################]100.00% 25.1MB/25.1MB (2.1MB/s) Job-103158846784545651976749201.csv Done...
##get metaviper scores
mtab<-synapser::synTableQuery(paste('select * from',metaviper_scores))$asDataFrame()
##
Building the CSV... [#-------------------]4.93% 72426/1467984
Building the CSV... [###-----------------]15.80% 231871/1467984
Building the CSV... [####----------------]20.84% 305867/1467984
Building the CSV... [######--------------]31.85% 467599/1467984
Building the CSV... [#########-----------]44.36% 651131/1467984
Building the CSV... [#########-----------]44.36% 651131/1467984
Building the CSV... [####################]100.00% 1467984/1467984 Done...
Downloading [--------------------]2.47% 2.0MB/81.0MB (2.0MB/s) Job-103158857591122940761569476.csv
Downloading [#-------------------]4.94% 4.0MB/81.0MB (2.2MB/s) Job-103158857591122940761569476.csv
Downloading [#-------------------]7.40% 6.0MB/81.0MB (2.4MB/s) Job-103158857591122940761569476.csv
Downloading [##------------------]9.87% 8.0MB/81.0MB (2.5MB/s) Job-103158857591122940761569476.csv
Downloading [##------------------]12.34% 10.0MB/81.0MB (2.5MB/s) Job-103158857591122940761569476.csv
Downloading [###-----------------]14.81% 12.0MB/81.0MB (2.6MB/s) Job-103158857591122940761569476.csv
Downloading [###-----------------]17.27% 14.0MB/81.0MB (2.7MB/s) Job-103158857591122940761569476.csv
Downloading [####----------------]19.74% 16.0MB/81.0MB (2.8MB/s) Job-103158857591122940761569476.csv
Downloading [####----------------]22.21% 18.0MB/81.0MB (2.8MB/s) Job-103158857591122940761569476.csv
Downloading [#####---------------]24.68% 20.0MB/81.0MB (2.9MB/s) Job-103158857591122940761569476.csv
Downloading [#####---------------]27.14% 22.0MB/81.0MB (3.0MB/s) Job-103158857591122940761569476.csv
Downloading [######--------------]29.61% 24.0MB/81.0MB (3.0MB/s) Job-103158857591122940761569476.csv
Downloading [######--------------]32.08% 26.0MB/81.0MB (3.1MB/s) Job-103158857591122940761569476.csv
Downloading [#######-------------]34.55% 28.0MB/81.0MB (3.2MB/s) Job-103158857591122940761569476.csv
Downloading [#######-------------]37.01% 30.0MB/81.0MB (3.3MB/s) Job-103158857591122940761569476.csv
Downloading [########------------]39.48% 32.0MB/81.0MB (3.4MB/s) Job-103158857591122940761569476.csv
Downloading [########------------]41.95% 34.0MB/81.0MB (3.5MB/s) Job-103158857591122940761569476.csv
Downloading [#########-----------]44.42% 36.0MB/81.0MB (3.6MB/s) Job-103158857591122940761569476.csv
Downloading [#########-----------]46.88% 38.0MB/81.0MB (3.7MB/s) Job-103158857591122940761569476.csv
Downloading [##########----------]49.35% 40.0MB/81.0MB (3.8MB/s) Job-103158857591122940761569476.csv
Downloading [##########----------]51.82% 42.0MB/81.0MB (3.9MB/s) Job-103158857591122940761569476.csv
Downloading [###########---------]54.29% 44.0MB/81.0MB (4.0MB/s) Job-103158857591122940761569476.csv
Downloading [###########---------]56.76% 46.0MB/81.0MB (4.1MB/s) Job-103158857591122940761569476.csv
Downloading [############--------]59.22% 48.0MB/81.0MB (4.2MB/s) Job-103158857591122940761569476.csv
Downloading [############--------]61.69% 50.0MB/81.0MB (4.3MB/s) Job-103158857591122940761569476.csv
Downloading [#############-------]64.16% 52.0MB/81.0MB (4.4MB/s) Job-103158857591122940761569476.csv
Downloading [#############-------]66.63% 54.0MB/81.0MB (4.5MB/s) Job-103158857591122940761569476.csv
Downloading [##############------]69.09% 56.0MB/81.0MB (4.6MB/s) Job-103158857591122940761569476.csv
Downloading [##############------]71.56% 58.0MB/81.0MB (4.7MB/s) Job-103158857591122940761569476.csv
Downloading [###############-----]74.03% 60.0MB/81.0MB (4.8MB/s) Job-103158857591122940761569476.csv
Downloading [###############-----]76.50% 62.0MB/81.0MB (4.8MB/s) Job-103158857591122940761569476.csv
Downloading [################----]78.96% 64.0MB/81.0MB (5.0MB/s) Job-103158857591122940761569476.csv
Downloading [################----]81.43% 66.0MB/81.0MB (5.0MB/s) Job-103158857591122940761569476.csv
Downloading [#################---]83.90% 68.0MB/81.0MB (5.1MB/s) Job-103158857591122940761569476.csv
Downloading [#################---]86.37% 70.0MB/81.0MB (5.2MB/s) Job-103158857591122940761569476.csv
Downloading [##################--]88.83% 72.0MB/81.0MB (5.3MB/s) Job-103158857591122940761569476.csv
Downloading [##################--]91.30% 74.0MB/81.0MB (5.4MB/s) Job-103158857591122940761569476.csv
Downloading [###################-]93.77% 76.0MB/81.0MB (5.5MB/s) Job-103158857591122940761569476.csv
Downloading [###################-]96.24% 78.0MB/81.0MB (5.6MB/s) Job-103158857591122940761569476.csv
Downloading [####################]98.70% 80.0MB/81.0MB (5.6MB/s) Job-103158857591122940761569476.csv
Downloading [####################]100.00% 81.0MB/81.0MB (5.7MB/s) Job-103158857591122940761569476.csv Done...
##get rf loadings
rftab<-synapser::synTableQuery(paste('select * from',rf_mp))$asDataFrame()%>%
select(LV_Full,`Cutaneous Neurofibroma`,`Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,`Plexiform Neurofibroma`)%>%
mutate(latent_var=gsub('`','',LV_Full))%>%
select(-LV_Full)
##
[####################]100.00% 1/1 Done...
Downloading [####################]100.00% 79.8kB/79.8kB (481.6kB/s) Job-103158862193713213287060949.csv Done...
samps<-intersect(dtab$specimenID,mtab$specimenID)
#get RF-selected latent variables
lvs<-synTableQuery("select * from syn21318452")$asDataFrame()%>%
rename(latent_var='LatentVar')%>%
select(-c(ROW_ID,ROW_VERSION))
##
[####################]100.00% 1/1 Done...
Downloading [####################]100.00% 3.7kB/3.7kB (1.9MB/s) Job-103158875804676108625138343.csv Done...
mp_res<-dtab%>%
subset(specimenID%in%samps)%>%
select(latent_var,value,specimenID,tumorType)%>%
right_join(lvs,by='latent_var')
combined<-mtab%>%
select(specimenID,metaviperscore,gene,sex)%>%distinct()%>%
right_join(mp_res,by='specimenID')
#now compute some basic stats
#mp_stats<-mp_res%>%
# rowwise()%>%mutate(All=max(`Cutaneous Neurofibroma`,`Plexiform Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,Neurofibroma))%>%
# rowwise()%>%
# mutate(MeanVal=mean(c(`Cutaneous Neurofibroma`,`Plexiform Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,Neurofibroma)))
#DT::datatable(mp_stats)
With the RF-selected LVs for each random forest prediction, we can plot those metaviper proteins that correlate with them.
corVals=combined%>%#subset(latent_var%in%unique(unlist(top10)))%>%
group_by(latent_var,gene)%>%
summarize(corVal=cor(value,metaviperscore,use='pairwise.complete.obs'),numSamps=n_distinct(specimenID))
corVals
## # A tibble: 597,016 x 4
## # Groups: latent_var [98]
## latent_var gene corVal numSamps
## <chr> <chr> <dbl> <int>
## 1 1,REACTOME_MRNA_SPLICING AATF 0.436 77
## 2 1,REACTOME_MRNA_SPLICING ABCA1 -0.570 77
## 3 1,REACTOME_MRNA_SPLICING ABCC8 -0.327 77
## 4 1,REACTOME_MRNA_SPLICING ABCC9 -0.619 77
## 5 1,REACTOME_MRNA_SPLICING ABCG1 -0.521 77
## 6 1,REACTOME_MRNA_SPLICING ABCG4 0.239 77
## 7 1,REACTOME_MRNA_SPLICING ABI1 -0.356 77
## 8 1,REACTOME_MRNA_SPLICING ABL1 -0.0887 77
## 9 1,REACTOME_MRNA_SPLICING ABL2 -0.318 77
## 10 1,REACTOME_MRNA_SPLICING ABLIM3 -0.652 77
## # … with 597,006 more rows
##let's store this in Synapse
tab<-synBuildTable('RF-selected LVs correlated with Metaviper Activity',parent='syn21046734',corVals)
synStore(tab)
##
Uploading [--------------------]0.00% 0.0bytes/27.0MB file14ab75dcc638f
Uploading [######--------------]29.63% 8.0MB/27.0MB (626.8kB/s) file14ab75dcc638f
Uploading [############--------]59.26% 16.0MB/27.0MB (621.1kB/s) file14ab75dcc638f
Uploading [##################--]88.89% 24.0MB/27.0MB (654.5kB/s) file14ab75dcc638f
Uploading [####################]100.00% 27.0MB/27.0MB (597.6kB/s) file14ab75dcc638f Done...
Update: 0 [--------------------]0.00% 0/9223372036854775807
Update: 0 [--------------------]0.00% 0/9223372036854775807
Update: 0 [--------------------]0.00% 0/9223372036854775807
Update: 0 [####################]100.00% 9223372036854775807/9223372036854775807 Done...
## <synapseclient.table.CsvFileTable object at 0x11ecc7f60>
#corVals<-corVals%>%subset(latent_var%in%unique(unlist(top10)))
##now how do we bracket them?
##plot correlation distributions by cell type and method.
require(ggplot2)
##first re-order variables to plot
top.df<-mp_res%>%
select(-c(specimenID,tumorType,value))%>%
gather(key="tumorType",value="top40",-latent_var)%>%unique()
p<-corVals%>%
ungroup()%>%
subset(latent_var%in%unique(top.df$latent_var))%>%
# mutate(LatentVariable = stringr::str_trim(as.character(latent_var), 20))%>%
ggplot()+geom_boxplot(aes(x=latent_var,y=corVal))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of metaviper proteins with lv")
print(p)
There are some proteins that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.
These plots represent the top latent variables for a predictor of each tumor type and the proteins that are correlated with them.
corthresh=0.75
##now filter to the cell types with correlated proteins
cor_cell_types=subset(corVals,corVal>corthresh)%>%
subset(latent_var%in%unique(unlist(lvs)))%>%
ungroup()%>%
select(latent_var)%>%
distinct()
print(paste('we found',nrow(cor_cell_types),'lvs with some protein correlation greater than',corthresh))
## [1] "we found 52 lvs with some protein correlation greater than 0.75"
DT::datatable(cor_cell_types)
apply(cor_cell_types,1,function(x){
ct=x[['latent_var']]
#for each gene and cell type
genes=subset(corVals,latent_var==ct)%>%
subset(corVal>corthresh)%>%
arrange(desc(corVal))%>%
ungroup()
if(nrow(genes)>12){
new.corthresh=format(genes$corVal[12],digits=3)
genes=genes[1:12,]
}else{
new.corthresh=corthresh
}
scores=subset(combined,gene%in%genes$gene)%>%subset(latent_var==ct)
dis= subset(top.df,latent_var==ct)%>%
subset(top40=='Y')%>%
select(tumorType)%>%
unique()
paste(collapse=',')
p2<- ggplot(scores)+
geom_point(aes(x=value,y=metaviperscore,
col=gene,shape=tumorType))+
# scale_x_log10()+
ggtitle(paste(ct,'correlation >',new.corthresh,'\n',paste(unlist(dis),collapse=',')))
cat(ct)##print out so we can search
print(p2)
# ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})
## 1,REACTOME_MRNA_SPLICING
## 13,REACTOME_GLUCOSE_METABOLISM
## 22,KEGG_PPAR_SIGNALING_PATHWAY
## 24,PID_DELTANP63PATHWAY
## 39,SVM Dendritic cells resting
## 4,REACTOME_NEURONAL_SYSTEM
## 45,REACTOME_RNA_POL_I_PROMOTER_OPENING
## 517,REACTOME_SIGNALING_BY_EGFR_IN_CANCER
## 720,PID_FANCONI_PATHWAY
## 767,SVM B cells naive
## 848,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY
## 915,MIPS_SPLICEOSOME
## 928,DMAP_ERY3
## 953,IRIS_Monocyte-Day1
## LV 100
## LV 138
## LV 15
## LV 167
## LV 185
## LV 187
## LV 195
## LV 229
## LV 238
## LV 272
## LV 303
## LV 304
## LV 308
## LV 32
## LV 334
## LV 376
## LV 379
## LV 380
## LV 396
## LV 434
## LV 445
## LV 496
## LV 519
## LV 520
## LV 533
## LV 546
## LV 624
## LV 625
## LV 653
## LV 665
## LV 751
## LV 835
## LV 851
## LV 9
## LV 909
## LV 917
## LV 957
## LV 984
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
##
## [[33]]
##
## [[34]]
##
## [[35]]
##
## [[36]]
##
## [[37]]
##
## [[38]]
##
## [[39]]
##
## [[40]]
##
## [[41]]
##
## [[42]]
##
## [[43]]
##
## [[44]]
##
## [[45]]
##
## [[46]]
##
## [[47]]
##
## [[48]]
##
## [[49]]
##
## [[50]]
##
## [[51]]
##
## [[52]]